In [1]:
pip install pandas
Requirement already satisfied: pandas in ./opt/anaconda3/lib/python3.9/site-packages (1.4.4)
Requirement already satisfied: python-dateutil>=2.8.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas) (2022.1)
Requirement already satisfied: numpy>=1.18.5 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas) (1.21.5)
Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [3]:
pip install pymysql
Requirement already satisfied: pymysql in ./opt/anaconda3/lib/python3.9/site-packages (1.0.2)
Note: you may need to restart the kernel to use updated packages.
In [4]:
import os
import pymysql
import pandas as pd
host= os.getenv( 'MYSQL_HOST' )
conn= pymysql.connect(
    host=host,
    port=int (3306),
    user= "root",
    passwd='aryak123',
    db="INSURANCE",
    charset='utf8mb4' )
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
In [5]:
df1=pd.read_sql_query("SELECT d.money_insured as MONEY_INSURED, d.insurance_type AS INSURANCE_TYPE,  a.insurance_id AS INSURANCE_ID, b.status AS STATUS, f.P_Name as PatientName FROM insure a, claim_details b, insurance_type c, health_insurance d, person e, patient_info f WHERE a.claim_id = b.claim_id	AND b.status = 'TRUE'	AND a.insurance_id = c.insurance_id	AND c.insurance_type = d.insurance_type and e.person_insurance_id=c.insurance_id   and e.personID=f.Person_ID",conn)
/Users/aryakbodkhe/opt/anaconda3/lib/python3.9/site-packages/pandas/io/sql.py:762: UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy
  warnings.warn(
In [6]:
df1.head()
Out[6]:
MONEY_INSURED INSURANCE_TYPE INSURANCE_ID STATUS PatientName
0 92254 PPO 6 TRUE Erina Wye
1 92254 PPO 149 TRUE Krystyna Quirk
2 74421 POS 141 TRUE Barbara-anne Parbrook
3 74421 POS 195 TRUE Chrisy Hessentaler
4 72931 EPO 131 TRUE Sherman Joint
In [7]:
df1.nunique()
Out[7]:
MONEY_INSURED       4
INSURANCE_TYPE      4
INSURANCE_ID      132
STATUS              1
PatientName       132
dtype: int64
In [8]:
df1.describe()
Out[8]:
INSURANCE_ID
count 152.000000
mean 95.190789
std 60.819212
min 1.000000
25% 37.750000
50% 92.000000
75% 151.250000
max 198.000000
In [9]:
px.histogram(df1, x="INSURANCE_TYPE",color="INSURANCE_TYPE", barmode='group')
In [10]:
df2=pd.read_sql_query("select * from hospital",conn)
/Users/aryakbodkhe/opt/anaconda3/lib/python3.9/site-packages/pandas/io/sql.py:762: UserWarning:

pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy

In [11]:
df2.nunique()
Out[11]:
hospital_id         61
hospital_name       61
h_address           47
isunderinsurance     2
dtype: int64
In [12]:
df3=pd.read_sql_query("select * from final_information a, patient_info b where a.id_info=b.info_id ",conn)
/Users/aryakbodkhe/opt/anaconda3/lib/python3.9/site-packages/pandas/io/sql.py:762: UserWarning:

pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy

In [13]:
df3.head()
Out[13]:
id_info zip city Person_ID gender Person_ID Info_ID Medical_History email Date_of_Birth Status P_Name
0 156 2131 Roslindale 188 male 1 156 Postprocedural cardiogenic shock, initial enc... esaul0@upenn.edu 1/22/1994 true Caye Barnsdale
1 147 2703 Attleboro 60 male 2 147 Puncture wound with foreign body of left elbow hkynge1@census.gov 7/8/2001 true Tessi Raywood
2 199 1721 Ashland 41 female 3 199 External constriction, right lower leg, subseq... gmolineaux2@zdnet.com 11/8/1995 false Wallie Ezzell
3 54 1003 Amherst 31 male 4 54 Vitreous abscess (chronic), right eye mdalliwater3@deliciousdays.com 2/13/2002 true Frederic Blake
4 49 2726 Somerset 11 male 5 49 Displ transverse fx shaft of r rad, 7thN smazin4@un.org 3/11/1998 true Jesse Young
In [14]:
df3.head()
Out[14]:
id_info zip city Person_ID gender Person_ID Info_ID Medical_History email Date_of_Birth Status P_Name
0 156 2131 Roslindale 188 male 1 156 Postprocedural cardiogenic shock, initial enc... esaul0@upenn.edu 1/22/1994 true Caye Barnsdale
1 147 2703 Attleboro 60 male 2 147 Puncture wound with foreign body of left elbow hkynge1@census.gov 7/8/2001 true Tessi Raywood
2 199 1721 Ashland 41 female 3 199 External constriction, right lower leg, subseq... gmolineaux2@zdnet.com 11/8/1995 false Wallie Ezzell
3 54 1003 Amherst 31 male 4 54 Vitreous abscess (chronic), right eye mdalliwater3@deliciousdays.com 2/13/2002 true Frederic Blake
4 49 2726 Somerset 11 male 5 49 Displ transverse fx shaft of r rad, 7thN smazin4@un.org 3/11/1998 true Jesse Young
In [15]:
df3.nunique()
Out[15]:
id_info            184
zip                157
city               148
Person_ID          173
gender               2
Person_ID          200
Info_ID            184
Medical_History    199
email              200
Date_of_Birth      197
Status               2
P_Name             188
dtype: int64
In [16]:
df3['gender'].value_counts()
Out[16]:
male      134
female     66
Name: gender, dtype: int64
In [17]:
# This function converts given date to age
from datetime import datetime, date
def age(born):
    born = datetime.strptime(born, "%m/%d/%Y").date()
    today = date.today()
    return today.year - born.year - ((today.month, 
                                      today.day) < (born.month, 
                                                    born.day))
df3['Age'] = df3['Date_of_Birth'].apply(age)
In [18]:
df3.head()
Out[18]:
id_info zip city Person_ID gender Person_ID Info_ID Medical_History email Date_of_Birth Status P_Name Age
0 156 2131 Roslindale 188 male 1 156 Postprocedural cardiogenic shock, initial enc... esaul0@upenn.edu 1/22/1994 true Caye Barnsdale 28
1 147 2703 Attleboro 60 male 2 147 Puncture wound with foreign body of left elbow hkynge1@census.gov 7/8/2001 true Tessi Raywood 21
2 199 1721 Ashland 41 female 3 199 External constriction, right lower leg, subseq... gmolineaux2@zdnet.com 11/8/1995 false Wallie Ezzell 27
3 54 1003 Amherst 31 male 4 54 Vitreous abscess (chronic), right eye mdalliwater3@deliciousdays.com 2/13/2002 true Frederic Blake 20
4 49 2726 Somerset 11 male 5 49 Displ transverse fx shaft of r rad, 7thN smazin4@un.org 3/11/1998 true Jesse Young 24
In [19]:
px.histogram(df3, x="gender", color="Status", barmode='group', pattern_shape="Status")
In [20]:
fig = px.box(df3, x="Age",points="all", color="Status", notched=True)
fig.show()
In [21]:
df5=pd.read_sql_query("select * from hospital",conn)
df6=pd.read_sql_query("select * from hospital where isunderinsurance='true'",conn)
/Users/aryakbodkhe/opt/anaconda3/lib/python3.9/site-packages/pandas/io/sql.py:762: UserWarning:

pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy

/Users/aryakbodkhe/opt/anaconda3/lib/python3.9/site-packages/pandas/io/sql.py:762: UserWarning:

pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy

In [22]:
df5.head()
Out[22]:
hospital_id hospital_name h_address isunderinsurance
0 1 Anna Jaques Hospital Newburyport, MA FALSE
1 2 Athol Hospital Athol, MA TRUE
2 3 Baystate Franklin Medical Center Greenfield, MA TRUE
3 4 Baystate Medical Center Springfield, MA TRUE
4 5 Baystate Noble Hospital Westfield, MA TRUE
In [23]:
df6.head()
Out[23]:
hospital_id hospital_name h_address isunderinsurance
0 2 Athol Hospital Athol, MA TRUE
1 3 Baystate Franklin Medical Center Greenfield, MA TRUE
2 4 Baystate Medical Center Springfield, MA TRUE
3 5 Baystate Noble Hospital Westfield, MA TRUE
4 7 Berkshire Medical Center Pittsfield, MA TRUE
In [24]:
a=df5["h_address"].value_counts()
b=df5["h_address"].unique()
fig = px.pie(df5, values=a, names=b)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
In [25]:
a=df6["h_address"].value_counts()
b=df6["h_address"].unique()
fig = px.pie(df6, values=a, names=b)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
In [26]:
df2=pd.read_sql_query("SELECT d.money_insured as MONEY_INSURED, d.insurance_type AS INSURANCE_TYPE,  a.insurance_id AS INSURANCE_ID, b.status AS STATUS, f.P_Name as PatientName FROM insure a, claim_details b, insurance_type c, health_insurance d, person e, patient_info f WHERE a.claim_id = b.claim_id	AND b.status = 'TRUE'	AND a.insurance_id = c.insurance_id	AND c.insurance_type = d.insurance_type and e.person_insurance_id=c.insurance_id   and e.personID=f.Person_ID",conn)
/Users/aryakbodkhe/opt/anaconda3/lib/python3.9/site-packages/pandas/io/sql.py:762: UserWarning:

pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy

In [27]:
df2.head()
Out[27]:
MONEY_INSURED INSURANCE_TYPE INSURANCE_ID STATUS PatientName
0 92254 PPO 6 TRUE Erina Wye
1 92254 PPO 149 TRUE Krystyna Quirk
2 74421 POS 141 TRUE Barbara-anne Parbrook
3 74421 POS 195 TRUE Chrisy Hessentaler
4 72931 EPO 131 TRUE Sherman Joint
In [28]:
df1['INSURANCE_TYPE'].unique()
Out[28]:
array(['PPO', 'POS', 'EPO', 'HMO'], dtype=object)
In [30]:
sns.countplot(x='h_address',data=df6,order=pd.value_counts(df6['h_address']).iloc[:4].index)
plt.title('Hospitals Count Across Massachussetts')
Out[30]:
Text(0.5, 1.0, 'Hospitals Count Across Massachussetts')
In [ ]: